library(readr)
library(qvalue)
library(explore)
library(maditr)
library(GEOquery)
library(Matrix)
library(ggplot2)
library(dplyr)
library(pheatmap)
library(RColorBrewer)
library(celldex)
library(biomaRt)
library(org.Hs.eg.db)
library(plotly)
library(DESeq2)
library(msigdbr)
library(ape)
library(GSVA)
library(sva)
library(readxl)
library(clusterProfiler)
library(pheatmap)
library(gplots)
library(EnhancedVolcano)
library(metaRNASeq)
library(ggbeeswarm)
library(editData)
library(tidyverse)
library(corrr)
library(ggcorrplot)
library(FactoMineR)
library(factoextra)
library(esquisse)
library(corto)
library(circlize)
library(ComplexHeatmap)
library(janitor)
####Cores
#Vaccines
ann_colors = list(
vaccine = c(
"BBIBP" = "#dee2e6",
"BNT" = "#56cfe1",
"ChAd" = "#80ffdb",
"ChAd-BNT" = "#5e60ce",
"ZF2001" = "#b5179e",
"INFECTION" = "#ff4d6d",
"INFECTION-VACCINE"= "#ffccd5"),
dose = c(
"NA" = "white",
"0" = "white",
"1" = "#56cfe1",
"2" = "#5978d4",
"3" = "#b5179e"),
day = c(
"0" = "white",
"1" = "#caf0f8",
"2" = "#ade8f4",
"3" = "#90e0ef",
"4" = "#6CD5EA",
"5" = "#48cae4",
"6" = "#00b4d8",
"7" = "#0096c7",
"10" = "#0087BF",
"14" = "#0077b6",
"26" = "#015BA0",
"28" = "#023e8a",
"51" = "#03045e"),
type = c('IN'= "#e5e5e5", #1st Gen vaccines
'SU' = "#00a896",
'VV' = "#72efdd", #3rd Gen vaccines
'RNA' = "#56cfe1",
'VV-RNA' = "#5e60ce", #Heterologous
'IN-SU' = "#b5179e",
'INFECTION'= "#da1e37"), #Infection
regimen = c('HO' = "grey90",
'HE' = "#8d99ae",
"V-I-V" = "#36248f",
"V-I" = "#D7B0EE",
"I-V-I" = "#7400b8",
"I-I" = "#5390d9",
"I" = "#64dfdf"),
infection = c("none" = "white",
"1" = "#56cfe1",
"2" = "#5e60ce"),
variant = c("NA" = "white",
"Beta" = "#72efdd",
"Omicron" = "#5e60ce"),
severity = c("A (9%) MI (81%) MO (0) S (4%)" = "#caf0f8",
"A (29%) MI (57%) MO (14) S (0%)" = "#56cfe1",
"A (0%) MI (89%) MO (11%) S (0%)" = "#72efdd",
"A (9%) MI (47%) MO (21%) S (3%)" = "#64dfdf"
"S" = "#5e60ce",
Error: unexpected string constant in:
" "A (9%) MI (47%) MO (21%) S (3%)" = "#64dfdf"
"S""
# GSE189039
# GSE201530
Ler arquivos e transformar em tabela
Abra a lista do estudo e analise suas informações. Ao descompactar, surgem diversos arquivos “.featureCounts.txt.gz”, que devem ser descompactados. Para descompactar a lista de arquivos, use um loop.
Padronizando tabela de counts
Metadata
A tabela de counts fornecida pelo estudo é nomeada com o Subject ID + Timepoint e não com o codigo GSM. Por isso, teremos de tratar a tabela de metadados. Além disso, ela não possui a coluna de idade. É possível manipular estes dados no excel.
DESEQ2
Preparando dados
###### Padronize as variáveis
countData <- gse201530_counts_ready
Error: object 'gse201530_counts_ready' not found
Executando DESEQ2
Comparando tempos e condições
Nesta comparação, são encontradas DEGs de cada vacina individual nos diferentes tempos. Para comparar uma vacina com a outra em diferentes tempos, é necessário usar os argumentos “contrast” ou “name”.
#H-BNT-I
# contrast = c("disease_time", "H.BNT.I_D1", "H_D0")
# contrast = c("disease_time", "H.BNT.I_D2", "H_D0") (NAO DEU)
# contrast = c("disease_time", "H.BNT.I_D3", "H_D0")
# contrast = c("disease_time", "H.BNT.I_D4", "H_D0")
#I-BNT-I
# contrast = c("disease_time", "I.BNT.I_D2", "H_D0")
# contrast = c("disease_time", "I.BNT.I_D5", "H_D0")
#H-I
# contrast = c("disease_time", "H.I_D1", "H_D0")
#H-I-I
# contrast = c("disease_time", "H.I.I_D2", "H_D0")
#I-I
# contrast = c("disease_time", "I.I_D1", "H_D0")
# contrast = c("disease_time", "I.I_D3", "H_D0")
# contrast = c("disease_time", "I.I_D5", "H_D0")
H_BNT_I
#H_BNT_I_D1
gse201530_H_BNT_I_D1 = results(dds, contrast = c("disease_time", "H.BNT.I_D1", "H_D0")) %>%
as.data.frame() %>%
arrange(padj) %>%
mutate(condition = "H-BNT-I (D1)",
day = 1,
vaccine = "H-BNT-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
Error in h(simpleError(msg, call)) :
error in evaluating the argument 'x' in selecting a method for function 'as.data.frame': disease_time should be the name of a factor in the colData of the DESeqDataSet
I_BNT_I
######### I_BNT_I
#D2
gse201530_I_BNT_I_D2 = results(dds, contrast = c("disease_time", "I.BNT.I_D2", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "I-BNT-I (D2)",
day = 2,
vaccine = "I-BNT-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_I_BNT_I_D2, file = "gse201530_I_BNT_I_D2_DEGs.csv")
#D5
gse201530_I_BNT_I_D5 = results(dds, contrast = c("disease_time", "I.BNT.I_D5", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "I-BNT-I (D5)",
day = 5,
vaccine = "I-BNT-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_I_BNT_I_D5, file = "gse201530_H_BNT_I_D5_DEGs.csv")
gse201530_I_BNT_I = bind_rows(gse201530_I_BNT_I_D2,
gse201530_I_BNT_I_D5)
write.csv(gse201530_I_BNT_I, "gse201530_I_BNT_I.csv")
H-I
######### H-I
#D1
gse201530_H_I_D1 = results(dds, contrast = c("disease_time", "H.I_D1", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D1)",
day = 1,
vaccine = "H-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_H_I_D1, file = "gse201530_H_I_D1_DEGs-PF.csv")
#D5
gse201530_H_I_D5 = results(dds, contrast = c("disease_time", "I.BNT.I_D5", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D5)",
day = 5,
vaccine = "H-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_H_I_D5, file = "gse201530_H_I_D5_DEGs-PF.csv")
gse201530_H_I = bind_rows(gse201530_I_BNT_I_D2,
gse201530_I_BNT_I_D5)
write.csv(gse201530_H_I, "gse201530_H_I.csv")
H-I-I
######### H-I-I
#D2
gse201530_H_I_I_D2 = results(dds, contrast = c("disease_time", "H.I.I_D2", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I-I (D2)",
day = 2,
vaccine = "H-I-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_H_I_I_D2, file = "gse201530_H_I_I_D2_DEGs-PF.csv")
I-I
######### I-I
#D1
gse201530_I_I_D1 = results(dds, contrast = c("disease_time", "I.I_D1", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "I-I (D1)",
day = 1,
vaccine = "I-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_I_I_D1, file = "gse201530_I_I_D1_DEGs-PF.csv")
#D3
gse201530_I_I_D3 = results(dds, contrast = c("disease_time", "I.I_D3", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "I-I (D3)",
day = 3,
vaccine = "I-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_I_I_D3, file = "gse201530_I_I_D3_DEGs-PF.csv")
#D5
gse201530_I_I_D5 = results(dds, contrast = c("disease_time", "I.I_D5", "H_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "I-I (D5)",
day = 5,
vaccine = "I-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse201530_I_I_D5, file = "gse201530_I_I_D5_DEGs-PF.csv")
gse201530_I_I = bind_rows(gse201530_I_I_D1,
gse201530_I_I_D3,
gse201530_I_I_D5)
Unir raw datasets
########## GSE201530
gse201530_DEGs = bind_rows(gse201530_I_I,
gse201530_H_I_I_D2,
gse201530_H_I,
gse201530_I_BNT_I,
gse201530_H_BNT_I) %>%
mutate(study = "GSE201530")
#Salvar
write.csv(gse201530_DEGs, file = "gse201530_DEGs_27-11-23.csv")
GSE201530_metadata_conditions = gse201530_DEGs %>%
select(condition:vaccine, study) %>%
distinct()
#Anotações
ann_vaccines_lower = ann_vaccines %>%
clean_names()
ann_vaccines_27_11_23 = bind_rows(ann_vaccines_lower,
GSE201530_metadata_conditions)
#Salvar
write.csv(ann_vaccines_27_11_23, file = 'ann_vaccines_27_11_23.csv')
Comparando tempos e condições
Nesta comparação, são encontradas DEGs de cada vacina individual nos diferentes tempos. Para comparar uma vacina com a outra em diferentes tempos, é necessário usar os argumentos “contrast” ou “name”.
H_I
######### H_I
#H_I_D10
gse189039_H_I_moderate_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D10", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D10, moderate)",
day = 10,
vaccine = "H-I",
severity = "moderate",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_I_moderate_D10, file = "gse189039_H_I_moderate_D10_DEGs.csv")
#H_I_D26
gse189039_H_I_moderate_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D26", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D26, moderate)",
day = 26,
severity = "moderate",
vaccine = "H-I",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_I_moderate_D26, file = "gse189039_H_I_moderate_D26_DEGs.csv")
#H_I_D51
gse189039_H_I_moderate_D51 = results(dds, contrast = c("disease_vac_severity_time", "H.I_moderate_D51", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D51, moderate)",
day = 51,
vaccine = "H-I",
severity = "moderate",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_I_moderate_D51, file = "gse189039_H_I_moderate_D51_DEGs.csv")
#H_I_severe_D10
gse189039_H_I_severe_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D10", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D10, severe)",
day = 10,
vaccine = "H-I",
severity = "severe",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_I_severe_D10, file = "gse189039_H_I_severe_D10_DEGs.csv")
#H_I_severe_D26
gse189039_H_I_severe_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D26", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D26, severe)",
day = 26,
vaccine = "H-I",
severity = "severe",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_I_severe_D26, file = "gse189039_H_I_severe_D26_DEGs.csv")
#H_I_severe_D51
gse189039_H_I_severe_D51 = results(dds, contrast = c("disease_vac_severity_time", "H.I_severe_D51", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-I (D51, severe)",
day = 51,
vaccine = "H-I",
severity = "severe",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_I_severe_D51, file = "gse189039_H_I_severe_D51_DEGs.csv")
gse189039_H_I = bind_rows(gse189039_H_I_moderate_D10,
gse189039_H_I_severe_D10,
gse189039_H_I_moderate_D26,
gse189039_H_I_severe_D26,
gse189039_H_I_moderate_D51,
gse189039_H_I_severe_D51)
write.csv(gse189039_H_I, "gse189039_H_I_DEGS.csv")
H_BNT_I
######### H-BNT-I
#H_BNT-I
#MILD
gse189039_H_BNT_I_mild_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D10", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-BNT-I (D10, mild)",
day = 10,
vaccine = "H-BNT-I",
severity = "mild",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_BNT_I_mild_D10, file = "gse189039_H_BNT_I_mild_D10_DEGs.csv")
gse189039_H_BNT_I_mild_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_mild_D26", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-BNT-I (D26, mild)",
day = 26,
vaccine = "H-BNT-I",
severity = "mild",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_BNT_I_mild_D26, file = "gse189039_H_BNT_I_mild_D26_DEGs.csv")
#SEVERE
gse189039_H_BNT_I_severe_D10 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D10", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-BNT-I (D10, severe)",
day = 10,
vaccine = "H-BNT-I",
severity = "severe",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_BNT_I_severe_D10, file = "gse189039_H_BNT_I_severe_D10_DEGs.csv")
gse189039_H_BNT_I_severe_D26 = results(dds, contrast = c("disease_vac_severity_time", "H.BNT.I_severe_D26", "H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "H-BNT-I (D26, severe)",
day = 26,
vaccine = "H-BNT-I",
severity = "severe",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_H_BNT_I_severe_D26, file = "gse189039_H_BNT_I_severe_D26_DEGs.csv")
#UNIR
gse189039_H_BNT_I = bind_rows(gse189039_H_BNT_I_mild_D10,
gse189039_H_BNT_I_severe_D10,
gse189039_H_BNT_I_mild_D26,
gse189039_H_BNT_I_severe_D26)
#Salvar
write.csv(gse189039_H_BNT_I, file = "gse189039_H_BNT_I_DEGS.csv")
BNT-I-BNT
######### BNT-I-BNT
# D51
gse189039_BNT_I_BNT_severe_D51 = results(dds, contrast = c("disease_vac_severity_time",
"BNT.I.BNT_severe_D51",
"H_naive_vac_D0")) %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
arrange(padj) %>%
mutate(condition = "BNT-I-BNT (D51, severe)",
day = 51,
vaccine = "BNT-I-BNT",
severity = "severe",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_BNT_I_BNT_severe_D51, file = "gse189039_BNT_I_BNT_severe_D51_DEGs.csv")
#Aqui tive de usar o argumento name, pois contrast não estava funcionando (?)
gse189039_BNT_I_BNT_mild_D51 = results(dds, name= "disease_vac_severity_time_H_naive_vac_D0_vs_BNT.I.BNT_mild_D51") %>%
as.data.frame() %>%
filter(padj < 0.05) %>%
mutate(log2FoldChange = ifelse(log2FoldChange > 0, -log2FoldChange, abs(log2FoldChange))) %>%
arrange(padj) %>%
mutate(condition = "BNT-I-BNT (D51, mild)",
day = 51,
vaccine = "BNT-I-BNT",
severity = "mild",
direction = ifelse(log2FoldChange >= 1, "UP",
ifelse(log2FoldChange <= -1, "DOWN",
"NEUTRAL"))) %>%
rownames_to_column("genes")
#Salvar tabela
write.csv(gse189039_BNT_I_BNT_mild_D51, file = "gse189039_BNT_I_BNT_mild_D51_DEGs.csv")
#Unir
gse189039_BNT_I_BNT = bind_rows(gse189039_BNT_I_BNT_mild_D51,
gse189039_BNT_I_BNT_severe_D51)
write.csv(gse189039_BNT_I_BNT, file = "gse189039_BNT_I_BNT_DEGS.csv")
UNIR
all_degs_p_05_vac_infected = bind_rows(degs_allstudies_updown_p_05,
gse_201530_189039) %>%
mutate_all(~ replace(., is.na(.), "NA")) %>%
mutate(regimen = case_when(
regimen == "BNT-I-BNT" ~ "V-I-V",
regimen == "H-BNT-I" ~ "V-I",
regimen == "VAC-I" ~ "V-I",
regimen == "I-VAC-I" ~ "I-V-I",
regimen == "H-V-I" ~ "V-I",
regimen == "H-I-I" ~ "I-I",
regimen == "H-I" ~ "I",
TRUE ~ as.character(regimen)))
Error: object 'degs_allstudies_updown_p_05' not found
Comparar numero de participantes
Comparar sexo
Combinar tabela de genes sequenciados (raw)
genes_raw_combined %>%
group_by(study) %>%
summarise(study,
n_genes = n()) %>%
distinct()
Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in dplyr 1.1.0.
Please use `reframe()` instead.
When switching from `summarise()` to `reframe()`, remember that `reframe()` always returns an ungrouped data frame and adjust accordingly.`summarise()` has grouped output by 'study'. You can override using the `.groups` argument.
Os volcanoplots das duas vacinas são os mesmos, mas espelhados. Isso significa que definir o nivel de referencia no fator de vacinas é importante para determinar as DEGs up e down. #### ASTRAZENECA
Astrazeneca
#All
volcano_res <- EnhancedVolcano(res,
lab = rownames(res),
x = 'log2FoldChange',
y = 'pvalue',
title='Differential expression of genes - All timepoints - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_allfactores_AZ.png", width = 800, height = 600)
print(volcano_res_chad)
dev.off()
#T1-T0
volcano_res_chad_1_0 <- EnhancedVolcano(res_1_0_chad,
lab = rownames(res_1_0_chad),
x = 'log2FoldChange',
y = 'pvalue',
title='Differential expression of genes - D1 vs. D0 - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_D0-D1_AZ.png", width = 800, height = 600)
print(volcano_res_chad_1_0)
dev.off()
#T2-T0
volcano_res_chad_2_0 <- EnhancedVolcano(res_time0_time2_chad,
lab = rownames(res_time0_time2_chad),
x = 'log2FoldChange',
y = 'pvalue',
title='Differential expression of genes - D2 vs. D0 - ChAdOx1')
# Save the plot as a PNG file
png("gse199750_volcanoplot_DEGs_D0-D2_AZ.png", width = 800, height = 600)
print(volcano_res_chad_2_0)
dev.off()
#Plotar mais de um gráfico
plot_grid(volcano_res_chad, volcano_res_chad_1_0, volcano_res_chad_2_0, ncol = 3)
Tabela de overlaps de DEGs com teste exato de fisher
Determinar a proporção de DEGs compartilhadas entre condições diferentes e sua significância. O código abaixo deve ter como input uma tabela longa chamada “degs_all” (coluna 1: condicoes/vacinas, coluna 2: DEGs). O output será uma tabela longa com as colunas Vacina 1, Vacina 2, shared degs, not shared vacina 1, not shared vacina 2, lista com nomes das degs, valor p de fisher (teste exato de fisher).
pHeatmap
#Salvar imagem
png(file= paste0("shared", filename, "_p<10_qvalue10_PHEATMAP_numbers.png"),
width=3000,
height=2000,
res=315,
pointsize=10)
print(pheatmap_shared)
dev.off()
null device
1
#Salvar imagem
png(file= paste0("shared", filename, "_p<10_qvalue10_PHEATMAP_nonumbers.png"),
width=3000,
height=2000,
res=315,
pointsize=10)
print(pheatmap_shared_nonumbers)
dev.off()
null device
1
Chord diagram
#Chord diagram
install.packages("circlize")
library(circlize)
# sharedDEGs_UPDOWN_fisher_pvalue10
# sharedDEGs_UP_fisher_p_10_pvalue10
# sharedDEGs_DOWN_fisher_p_10_pvalue10
#Input
shared_genes_df_mirror_p10 = sharedDEGs_UP_fisher_p_10_pvalue10
#All doses
circos_alldoses = shared_genes_df_mirror_p10 %>%
select(Cond1, Cond2, Shared) %>%
mutate(Shared = as.numeric(Shared)) %>%
merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>%
select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>%
rename(cond1_dose = Dose.x,
cond2_dose = Dose.y) %>%
filter(Shared != 0)
circos_alldoses_mirror = circos_alldoses %>%
mutate(Cond1A = Cond2,
Cond2A = Cond1) %>%
select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>%
rename(Cond1 = Cond1A,
Cond2 = Cond2A) %>%
rbind(circos_alldoses)
write.csv(circos_alldoses_mirror, file = "Circos_DOWN_Alldoses_pvalue10_dose1.csv")
#Dose 1
circos_dose1 = shared_genes_df_mirror_p10 %>%
select(Cond1, Cond2, Shared) %>%
mutate(Shared = as.numeric(Shared)) %>%
merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>%
select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>%
rename(cond1_dose = Dose.x,
cond2_dose = Dose.y) %>%
filter(cond1_dose == 1,
cond2_dose == 1)
circos_dose1_mirror = circos_dose1 %>%
mutate(Cond1A = Cond2,
Cond2A = Cond1) %>%
select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>%
rename(Cond1 = Cond1A,
Cond2 = Cond2A) %>%
rbind(circos_dose1)
write.csv(circos_dose1_mirror, file = "Circos_DOWN_pvalue10_dose1.csv")
#Dose 2
circos_dose2 = shared_genes_df_mirror_p10 %>%
select(Cond1, Cond2, Shared) %>%
mutate(Shared = as.numeric(Shared)) %>%
merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>%
select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>%
rename(cond1_dose = Dose.x,
cond2_dose = Dose.y) %>%
filter(cond1_dose == 2,
cond2_dose == 2)
circos_dose2_mirror = circos_dose2 %>%
mutate(Cond1A = Cond2,
Cond2A = Cond1) %>%
select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>%
rename(Cond1 = Cond1A,
Cond2 = Cond2A) %>%
rbind(circos_dose2)
write.csv(circos_dose2_mirror, file = "Circos_DOWN_pvalue10_dose2.csv")
#Dose 3
circos_dose3 = shared_genes_df_mirror_p10 %>%
select(Cond1, Cond2, Shared) %>%
mutate(Shared = as.numeric(Shared)) %>%
merge(ann_vaccines, by.x = "Cond1", by.y="Condition", all.x=T, all.y=F) %>%
merge(ann_vaccines, by.x = "Cond2", by.y="Condition", all.x=T, all.y=F) %>%
select(Cond1, Cond2, Shared,Dose.x, Dose.y) %>%
rename(cond1_dose = Dose.x,
cond2_dose = Dose.y) %>%
filter(cond1_dose == 3,
cond2_dose == 3)
circos_dose3_mirror = circos_dose3 %>%
mutate(Cond1A = Cond2,
Cond2A = Cond1) %>%
select(Cond1A, Cond2A, Shared, cond1_dose, cond2_dose) %>%
rename(Cond1 = Cond1A,
Cond2 = Cond2A) %>%
rbind(circos_dose3)
write.csv(circos_dose3_mirror, file = "Circos_DOWN_pvalue10_dose3.csv")
Dotplot
#### Identidade e -log(q)
install.packages("ggdendro")
library(ggdendro)
library(cowplot)
library(ggtree)
library(patchwork)
#Filtrar valores com -log(p) >= 1
sharedDEGs_UP_DOWN_fisher_filtered <- sharedDEGs_UP_DOWN_fisher %>%
mutate(`Significance -log(p)` = cut(`-log(p)`,
breaks = c(-Inf, 1, 1.3, 2, Inf),
labels = c("<1", "1-1.3", "1.3-2", ">2"))) %>%
filter(Percentage_Shared_Cond1 >= 1, Percentage_Shared_Cond2 >= 1, `Significance -log(p)` != "<1")
#Salvar
write.csv(sharedDEGs_UP_DOWN_fisher_filtered, file = "sharedDEGs_UP_DOWN_fisher_filtered.csv")
#Visualizar
plot = ggplot(sharedDEGs_UP_DOWN_fisher, aes(x = Cond1, y = Cond2, color = Percentage_Shared_Cond1, size = Percentage_Shared_Cond1)) +
geom_point() +
scale_size_continuous(range = c(2, 8)) +
geom_text(aes(label = sprintf("%.f", Percentage_Shared_Cond1)), vjust = 0.5, hjust = 0.5, size = 2, color = "black") +
cowplot::theme_cowplot() +
theme(axis.line = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 10),
axis.text.y = element_text(size = 10)) +
scale_color_gradient(low = "white", high = "#ef233c", limits = c(0, 60)) +
ggtitle("%Identity between vaccines, Up and Down-Regulated, pvalue < 0.05" ) +
ylab('Cond2') +
theme(axis.ticks = element_blank()) +
guides(color = FALSE, size = FALSE)
#Salvar
ggsave(plot, file = "sharedDEGs_UP_DOWN_fisher.png")
#Display
plot
library(corto)
############## Arquivos necessários
#CellMarker_ImmuneCells.csv
#ImmuneGO_Annotated_Genes_Nested_Interesting.csv
ann_vaccines #Anotação das vacinas
############## GSE199750
gse199750_metadata_annotated = gse199750_metadata_annotated_16_11_23 %>%
mutate(condition = case_when(
condition == "BNT (V1, D6)BNT" ~ "BNT (V1, D6)",
condition == "BNT (V1, D6)MO" ~ "BNT-MO (V1, D6)",
TRUE ~ condition)) %>%
select(sample, condition, geo, day, dose, age, sex)
#Salvar
write.csv(gse199750_metadata_annotated, file = "gse199750_metadata_annotated_22-11-23.csv")
gse199750_counts_ready #matriz de expressão
############## GSE201533
gse201533_metadata_annotated = gse201533_metadata_long_annotated_14_11_23 %>% #Metadados
select(condition:vaccine)
gse201533_counts_ready #Matriz de expressão
############## GSE206023
gse206023_metadata_ready = gse206023_counts_long_annotated_14_11_23 %>% #Metadados
select(condition:age) %>%
distinct() %>%
mutate(type = if_else(type=="HO", "IN", type))
write.csv(gse206023_metadata_ready, file = "gse206023_metadata_ready.csv")
#Matriz de expressão
gse206023_counts_ready = gse206023_counts_long_annotated_14_11_23 %>%
mutate(counts = as.numeric(counts)) %>%
select(genes, sample, counts) %>%
pivot_wider(names_from = sample, values_from = counts, values_fn = mean) %>%
column_to_rownames("genes")
write.csv(gse206023_counts_ready, file = "gse206023_counts_ready.rds")
############## Inputs
ssgsea_gene = gse206023_counts_ready #Criar variável
metadata = gse206023_metadata_ready
filename = "GSE206023"
go_dataset = "CellMarker"
#Extrair sample names. O ssGSEA perde o nome no resultado.
sample_names = ssgsea_gene %>%
as.data.frame() %>%
colnames() %>%
as.data.frame() %>%
rename(sample_names = '.')
#Tabela com genes de interesse (geral) convertida em listas
genelist = CellMarker_ImmuneCells %>% #CellMarker_ImmuneCells.csv
select(cell = cell_name, genes = marker)
genelist = split(genelist$genes, genelist$cell)
############## Análise
# Run ssGSEA
nesmat = ssgsea(ssgsea_gene, genelist)
#Padronizar resultado
nesmat.result = nesmat %>%
as.data.frame() %>%
t() %>%
as.data.frame() %>%
rownames_to_column("sample") %>%
relocate(sample, .before = everything()) %>%
cbind(sample_names) %>%
select(-sample) %>%
column_to_rownames("sample_names") %>%
t() %>%
as.data.frame() %>%
rownames_to_column("process") %>%
relocate(process, .before=everything()) %>%
pivot_longer(cols = -process, names_to = "sample", values_to = "nes") %>%
mutate(pvalue = z2p(nes), #Converter NES em pvalue
qvalue = p.adjust(pvalue, method = "BH"), #ajustar para qvalue
logq = - log10(qvalue)) %>% #Calcular -log(q)
merge(metadata, by="sample", all.x=T) %>% #Unir com metadata
mutate(study = filename) %>%
merge(CellMarker_ImmuneCells, by.x ="process", by.y = "cell_name", all.x=T) %>%
select(-"...1") %>%
rename(cell_name = process)
#Salvar
write.csv(nesmat.result, file = paste0(filename, sep = "_", go_dataset, "_ssgsea_result.csv"))
print(nesmat.result)
Unir resultados
############### Unir resultados
GSE199750_CellMarker_ssgsea_result = GSE199750_CellMarker_ssgsea_result %>%
clean_names() %>%
select(cell_name:type, -marker) %>%
distinct()
GSE201533_CellMarker_ssgsea_result = GSE201533_CellMarker_ssgsea_result %>%
clean_names() %>%
select(cell_name:type, -marker) %>%
distinct()
GSE206023_CellMarker_ssgsea_result = GSE206023_CellMarker_ssgsea_result %>%
clean_names() %>%
select(cell_name:type, -marker) %>%
distinct()
GSE206023_CellMarker_ssgsea_result = GSE206023_CellMarker_ssgsea_result %>%
mutate(age = as.double(age))
ssgsea_results_unified = bind_rows(GSE199750_CellMarker_ssgsea_result,
GSE201533_CellMarker_ssgsea_result,
GSE206023_CellMarker_ssgsea_result)
ssgsea_results_unified = ssgsea_results_unified %>%
select(cell_name:immune_system) %>%
merge(ann_vaccines, by.x = "condition", by.y = "Condition")
#Salvar
write.csv(ssgsea_results_unified, file = paste0(go_dataset, "_ssgsea_results_unified.csv"))
# INPUT
nesmat.result = GSE199750_ssgsea_result
filename = "GSE199750"
#Filtrar
#Lista de processos
ssgsea_interesting = nesmat.result %>%
filter(pvalue <= 0.10) %>%
distinct()
#Boxplot (NES)
NES_plot = ggplot(ssgsea_interesting) +
aes(x = condition, y = nes, fill = vaccine) +
geom_boxplot() +
scale_fill_hue(direction = 1) +
theme_minimal() +
facet_grid(vars(process), vars(vaccine), scales = "free_x") +
labs(title = paste0(filename, "_ImmuneGO ssGSEA"),
fill = "vaccine") +
theme_bw() +
theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1, size = 7))
#Salvar
ggsave(NES_plot, file= paste0(filename, "_ssGSEA_NES_qvalue010.png"), width = 10, height = 20)
ssgsea_interesting.unified = ssgsea_results_unified %>%
filter(!condition %in% c("BNT-MO (V1, D0)", "BNT-MO (V2, D0)")) %>%
mutate(day = as.factor(day),
dose = as.factor(dose))
ssgsea_interesting.general.unified = ssgsea_interesting.unified %>%
filter(process %in% c("ADAPTIVE IMMUNE SYSTEM",
"CELLULAR ADAPTIVE IMMUNE SYSTEM",
"INNATE IMMUNE SYSTEM",
"INFLAMMATION"),
pvalue <= 0.10)
ssgsea_interesting.specific.unified = ssgsea_interesting.unified %>%
filter(!process %in% c("ADAPTIVE IMMUNE SYSTEM",
"CELLULAR ADAPTIVE IMMUNE SYSTEM",
"INNATE IMMUNE SYSTEM",
"INFLAMMATION"),
pvalue <= 0.10)
data = ssgsea_results_unified
filename.unified = "ssgsea_results_unified"
#All conditions divided by dose, time on x-axis
# Filtrar os dados antes de passar para ggplot
filtered_data <- data %>%
group_by(cell_name, Vaccine, condition) %>%
summarise(n = n()) %>%
filter(n < 5) %>%
pull(condition)
NES_plot_unified_dayx <- ggplot(data) +
aes(x = day, y = nes, fill = Vaccine) +
geom_boxplot(outlier.alpha = 1) +
geom_point(data = subset(data, day %in% filtered_data),
aes(x = day, y = nes, color = Vaccine),
position = position_jitter(width = 0.2), alpha = 1) +
scale_fill_manual(values = vaxcolors) +
scale_color_manual(values = c(BBIBP = "#ffd000",
ZF2001 = "#b5179e",
BNT = "#56cfe1",
ChAd = "#80ffdb" ,
"ChAd-BNT" = "#72efdd")) +
labs(
title = "ssGSEA - All studies",
subtitle = "General ImmuneGO, by dose",
caption = "Vaccine",
fill = "Vaccines"
) +
theme_minimal() +
facet_grid(vars(Type), vars(dose), scales = "free_x") +
theme(plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
strip.text.y = element_text(angle = 0, vjust = 1, hjust = 1, size = 5),
strip.text.x = element_text(size = 10))
#Salvar
ggsave(NES_plot_unified_dayx, file = paste0(filename.unified, "_ssGSEA_NES_qvalue010_2.png"), width = 10, height = 10)
print(NES_plot_unified_dayx)
#All conditions divided by vaccine, time on x-axis
# Criar o gráfico
NES_plot_unified_dayx_vax <- ggplot(data) +
aes(x = condition, y = nes, fill = dose) +
geom_boxplot(outlier.alpha = 0.5) +
geom_point(data = subset(data, condition %in% filtered_data),
aes(x = condition, y = nes, color = "black"),
position = position_jitter(width = 0.2), alpha = 1) +
scale_fill_manual(values = dose_colors) +
scale_color_manual(values = dose_colors) +
labs(
title = "ssGSEA - All studies",
subtitle = "General ImmuneGO, by dose",
caption = "Vaccine",
fill = "Vaccines"
) +
theme_minimal() +
facet_grid(vars(process), vars(vaccine), scales = "free") +
theme(
plot.title = element_text(size = 20L, face = "bold", hjust = 0.5),
axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 7),
strip.text.y = element_text(angle = 0, vjust = 1, hjust = 1, size = 7) # Rotação do texto da facet row
)
#Salvar
ggsave(NES_plot_unified_dayx_vax, file = paste0(filename.unified, "_ssGSEA_NES_qvalue010_3.png"), width = 20, height = 20)
print(NES_plot_unified_dayx_vax)
install.packages("ggstatsplot")
library(ggstatsplot)
ssgsea_interesting = ssgsea_interesting %>%
filter(process == "INNATE IMMUNE SYSTEM") %>%
distinct()
process = "INNATE IMMUNE SYSTEM"
ggbetweenstats(
data = ssgsea_interesting,
x = condition,
y = nes,
title = paste0("ImmuneGO ssGSEA ", filename, process)
)
#Agrupado
grouped_ggbetweenstats(
data = ssgsea_interesting,
x = condition,
y = nes,
grouping.var = process,
ggsignif.args = list(textsize = 4, tip_length = 0.01),
p.adjust.method = "bonferroni",
palette = "default_jama",
package = "ggsci",
plotgrid.args = list(nrow = 1),
annotation.args = list(title = paste0("ImmuneGO ssGSEA ", filename))
)
esquisser(test)
library(ggsignif)
library(rstatix)
df = test
df$Condition <- as.factor(df$Condition)
stat.test <- df %>%
t_test(Condition ~ NES) %>%
add_significance()
test %>%
filter(Process %in% "INNATE IMMUNE RESPONSE") %>%
ggboxplot(x = "Process", y = "NES",
color = "Condition", palette = "jco") +
facet_wrap(vars(Vaccine)) +
stat_compare_means(method = "anova", label.y = 4) + # Add global p-value
stat_compare_means(label = "p.signif", method = "t.test",
ref.group = ".all.") # Comparação múltipla usando Tukey HSD
# Box plots with p-values
bxp <- ggboxplot(df, x = "Process", y = "NES", fill = "#00AFBB")
stat.test <- stat.test %>% add_xy_position(x = "supp")
bxp +
stat_pvalue_manual(stat.test, label = "p") +
scale_y_continuous(expand = expansion(mult = c(0.05, 0.1)))
https://www.datacamp.com/tutorial/pca-analysis-r https://rpkgs.datanovia.com/factoextra/index.html http://www.sthda.com/english/articles/31-principal-component-methods-in-r-practical-guide/114-mca-multiple-correspondence-analysis-in-r-essentials/ https://statisticsglobe.com/pca-before-k-means-clustering-r>
Bibliotecas
library(factoextra)
library(caret)
library(stats)
library(ggfortify)
library(ggplot2)
ssgsea_results_unified = ssgsea_results_unified %>% distinct()
ssgsea_results_unified.innate = ssgsea_results_unified %>%
filter(process %in% c("INNATE IMMUNE SYSTEM", "ANTIVIRAL AND INTERFERON", "ARPP", "INFLAMMATION", "DENDRITIC CELLS", "MACROPHAGES"))
ssgsea_results_unified.adaptive = ssgsea_results_unified %>%
filter(!process %in% c("INNATE IMMUNE SYSTEM", "ANTIVIRAL AND INTERFERON", "ARPP", "INFLAMMATION", "DENDRITIC CELLS", "MACROPHAGES")) %>%
filter(!study == "GSE201533")
data_ImmuneGO = ssgsea_results_unified.adaptive
filename = "ssgsea_results_unified.adaptive"
pca_ann = data_ImmuneGO %>%
select(sample, condition, vaccine, geo:sex)
#Converter de long para wide
matrix_genes = data_ImmuneGO %>%
select(sample, process, nes)
matrix_genes <- dcast(matrix_genes, `sample` ~ `process`,
value.var = "nes",
fun.aggregate = mean)
matrix_genes <- replace(matrix_genes, matrix_genes == "NaN", 0)
#Converter coluna 1 em rownames e excluir
matrix_data_pca = matrix_genes %>%
as.data.frame()
matrix_data_pca_ready = matrix_data_pca %>%
column_to_rownames("sample")
ann_vaccines_pca_matrix = matrix_data_pca %>%
merge(pca_ann, by.y = "sample", by.x = "sample", all.x = T, all.y = F) %>%
distinct()
ann_vaccines_pca_matrix_ready = ann_vaccines_pca_matrix %>%
select(sample, condition, vaccine, day, dose) %>%
mutate(dose = as.factor(dose),
day = as.factor(day))
# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)
# Crie o gráfico de PCA
###### Condition
pca_plot_condition = autoplot(pca_res,
data = ann_vaccines_pca_matrix_ready,
colour = 'condition',
label = 0) +
labs(title=paste0(filename, "_bycondition")) + #scale fill manual
scale_fill_continuous(type = "viridis") +
theme_minimal()
###### Vaccine
pca_plot_vac = autoplot(pca_res,
data = ann_vaccines_pca_matrix_ready,
colour = 'vaccine',
label = 0) +
labs(title=paste0(filename, "_byvaccine")) + #scale fill manual
scale_color_manual(
values = c(
"BBIBP" = "#ffd000",
"BNT" = "#56cfe1",
"ChAd" = "#80ffdb",
"ChAd-BNT" = "#0077b6",
"ZF2001" = "#b5179e")) +
theme_minimal()
###### Day
pca_plot_day = autoplot(pca_res,
data = ann_vaccines_pca_matrix_ready,
colour = 'day',
label = 0) +
labs(title=paste0(filename, "_day")) + #scale fill manual
scale_color_manual(
values = c(
"0" = "grey90",
"1" = "#caf0f8",
"3" = "#90e0ef",
"6" = "#00b4d8",
"7" = "#0077b6",
"14" = "#023e8a",
"28" = "#03045e")) +
theme_minimal()
###### Dose
pca_plot_dose = autoplot(pca_res,
data = ann_vaccines_pca_matrix_ready,
colour = 'dose',
label = 0) +
labs(title=paste0(filename, "_dose")) +
scale_color_manual(
values = c("0" = "#caf0f8",
"1" = "#56cfe1",
"2" = "#5978d4",
"3" = "#b5179e"))+
theme_minimal()
# Exiba o gráfico
print(pca_plot_condition)
print(pca_plot_day)
print(pca_plot_dose)
print(pca_plot_vac)
ggsave(pca_plot_condition, filename = paste0(filename, "CONDITION_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_vac, filename = paste0(filename, "_VACCINE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_day, filename = paste0(filename, "_DAY_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_dose, filename = paste0(filename, "_DOSE_PCA_Immune_GO_GSEA_not-ann.png"), width = 10, height = 8)
############### KNN classification
#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
fviz_nbclust(pca_scores,
FUNcluster = kmeans,
method = "wss")
set.seed(123) # Set seed for randomization
kmeans_clust <- kmeans(pca_scores, # Perform k-means clustering
centers = 3) # Definir numero de clusters
#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1")
print(ggp2)
ggsave(ggp2, file = paste0(filename, "_KNN_Clustered.png"), width = 5, height = 4)
# Clusterizar por grupo
#Vaccine
pca_group_vac <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix_ready$vaccine,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Vac_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1")
#Dose
pca_group_dose <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix_ready$dose,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Dose_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1")
#Day
pca_group_day <- fviz_pca_ind(pca_res,
habillage = ann_vaccines_pca_matrix_ready$day,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "t",
label = "none",
labelsize = 0) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "_Day_KNN_Clustered.png")) +
scale_color_brewer(palette="Set1")
#Salvar
ggsave(pca_group_day, file = paste0(filename, "_Day_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_dose, file = paste0(filename, "_Dose_Clustered.png"), width = 5, height = 4)
ggsave(pca_group_vac, file = paste0(filename, "_Vac_Clustered.png"), width = 5, height = 4)
#Biplot
biplot = fviz_pca_biplot(pca_res, # Visualize clusters in biplot
col.var = "black",
alpha.var = 0.5,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
labelsize = 3,
label = "var",
palette = "Set1")
ggsave(biplot, file = paste0(filename, "_BIPLOT_KNN_Clustered.png"), width = 10, height = 8)
########Correlation plot
corr_matrix = cor(matrix_data_pca_ready)
#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) +
theme(axis.text.x = element_text(angle = 90, size = 5),
axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"),
width = 4, #Grande 20, pequeno 10
height= 4) #Grande 20, pequeno 10
print(corrplot)
data.pca <- princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
scree_plot = fviz_eig(data.pca,
addlabels = TRUE,
ylim = c(0, 70)) +
geom_col(color = "#00AFBB", fill = "#00AFBB") +
theme_classic()
#Salvar
ggsave(scree_plot, file = paste0(filename, "_GSEA_screeplot.png"), width = 10, height = 3)
print(scree_plot)
#Scree plot
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))
#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) +
ggtitle(paste0("Comp1-Comp2 Genes", filename)) +
ylab("Comp1") +
xlab("Gene sets") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
print(loadings_1_plot)
loadings_2_plot = ggplot(loadings_2, aes(x = reorder(Genes, -Comp.2), y=Comp.2, fill = Comp.2)) +
ggtitle(paste0("Comp2-Comp3 Genes_", filename)) +
ylab("Comp2") +
xlab("Gene sets") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 1, hjust = 1, angle = 45, size = 8),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
print(loadings_2_plot)
#Salvar
ggsave(loadings_1_plot,
file = paste0(filename, "_GSEA_genescomp1-2.png"),
width = 10, height = 5)
#Salvar
ggsave(loadings_2_plot,
file = paste0(filename, "_GSEA_genescomp2-3.png"),
width = 10, height = 5)
# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
fviz_pca_var_genes
ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_GSEA.png"), width = 10)
cos2.1 = fviz_cos2(data.pca, choice = "var", axes = 1:2)
cos2.2 = fviz_cos2(data.pca, choice = "var", axes = 2:3)
ggsave(cos2.1, file = paste0(filename, "_cos2_GSEA_1.png"), width = 10)
ggsave(cos2.2, file = paste0(filename, "_cos2_GSEA_2.png"), width = 10)
Processar dados
# Immune_GO_general_innate
# Immune_GO_adaptive
#INPUT
data_genes = ssgsea_results_unified
filename = "ssgsea_results_unified"
PCA
#Converter de long para wide
matrix_genes = data_genes %>%
select(study, genes, l2fc) %>%
dcast(`study` ~ `genes`,
value.var = "l2fc",
fun.aggregate = mean) %>%
as.data.frame() %>%
column_to_rownames(var = "study") %>%
replace(is.na(.), 0)
matrix_data_pca = matrix_genes %>%
rownames_to_column(var = "Condition")
matrix_data_pca_ready = matrix_data_pca %>%
column_to_rownames(var = "Condition")
ann_vaccines_pca_matrix = ann_vaccines_pca %>%
merge(matrix_data_pca,
by.x = "Condition",
all.x = F,
all.y = F) %>%
select(Condition:Efficacy)
# Verifique quais colunas têm variância muito baixa
nearZeroVarCols <- nearZeroVar(matrix_data_pca_ready, saveMetrics = TRUE)
matrix_data_pca_ready <- matrix_data_pca_ready[, !nearZeroVarCols$nzv]
pca_res <- prcomp(matrix_data_pca_ready, scale. = TRUE)
# Crie o gráfico de PCA
pca_plot_ann = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'Vaccine',
label = 1) + #1: display, 0: hide
labs(title=filename) +
theme_classic()
# scale_color_manual(values = c(BBIBP = "#black",
# ZF2001 = "black",
# BNT = "#56cfe1",
# ChAd = "#80ffdb" ,
# "ChAd-BNT" = "#72efdd"))
pca_plot_not_ann = autoplot(pca_res,
data = ann_vaccines_pca_matrix,
colour = 'Vaccine',
label = 0) + #1: display, 0: hide
labs(title=filename) +
theme_classic() +
stat_ellipse(type = "t")
#Salvar
ggsave(pca_plot_not_ann, filename = paste0(filename, "_PCA_not-ann.png"), width = 10, height = 8)
ggsave(pca_plot_ann, filename = paste0(filename, "_PCA_ann.png"), width = 10, height = 8)
# Exiba o gráfico
print(pca_plot_ann)
print(pca_plot_not_ann)
############### KNN classification
#Determinar o número de clusters para KNN
pca_scores <- data.frame(pca_res$x[, 1:2])
ggp1 <- fviz_nbclust(pca_scores,
FUNcluster = kmeans,
method = "wss")
ggp1
set.seed(123) # Set seed for randomization
kmeans_clust <- kmeans(pca_scores, # Perform k-means clustering
centers = 4) # Definir numero de clusters
#Visualizar clusters
ggp2 <- fviz_pca_ind(pca_res,
habillage = kmeans_clust$cluster,
repel = TRUE,
addEllipses = TRUE,
ellipse.type = "convex",
labelsize = 2) +
guides(color = guide_legend(override.aes = list(label = ""))) +
ggtitle(paste0(filename, "KNN_Clustered.png"))
print(ggp2)
ggsave(ggp2, file = paste0(filename, "KNN_Clustered.png"), width = 5, height = 4)
########Correlation plot
#Matriz
corr_matrix = cor(matrix_data_pca_ready)
#Plot
corrplot = ggcorrplot(corr_matrix, hc.order = TRUE) +
theme(axis.text.x = element_text(angle = 90, size = 5),
axis.text.y = element_text(size = 5))
#Salvar
ggsave(corrplot, file = paste0(filename, "_corrplot.png"),
width = 20, #Grande 20, pequeno 10
height= 20) #Grande 20, pequeno 10
print(corrplot)
#########Scree plot
data.pca = princomp(corr_matrix) #PCA
summary(data.pca) #Retornar PCs
#########Scree plot
scree_plot = fviz_eig(data.pca,
addlabels = TRUE,
ylim = c(0, 70)) +
geom_col(color = "#00AFBB", fill = "#00AFBB") +
theme_classic()
#Salvar
ggsave(scree_plot, file = paste0(filename, "_screeplot.png"), width = 10, height = 3)
print(scree_plot)
#Comp1-Comp2
loadings = data.frame(data.pca$loadings[, 1:3])
loadings$Genes = rownames(loadings)
loadings_1 = arrange(loadings, desc(Comp.1))
loadings_2 = arrange(loadings, desc(Comp.2))
loadings_3 = arrange(loadings, desc(Comp.3))
#Plot
loadings_1_plot = ggplot(loadings_1, aes(x = reorder(Genes, -Comp.1), y=Comp.1, fill = Comp.1)) +
ggtitle(paste0("Comp1-Comp2 Genes", filename)) +
ylab("Comp1") +
xlab("Genes") +
geom_bar(stat = "identity") +
theme_light() +
theme(axis.text.x = element_text(vjust = 0.1, hjust = 1, angle = 90, size = 3),
axis.text.y = element_text(size = 5),
plot.title = element_text(size = 15L, hjust = 0.5)) +
scale_fill_continuous(type = "viridis") #cores
#Salvar
ggsave(loadings_1_plot,
file = paste0(filename, "_genescomp1-2.png"),
width = 10, height = 5)
print(loadings_1_plot)
# Graph of the variables
fviz_pca_var_genes = fviz_pca_var(data.pca,
col.ind = "cos2",
gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
#Salvar
ggsave(fviz_pca_var_genes, file = paste0(filename, "_fviz_pca_var_genes.png"), width = 10)
######### COS2
cos2 = fviz_cos2(data.pca,
choice = "var",
axes = 1:2) +
theme(axis.text.x = element_text(angle=45,
size = 3)) +
theme_light()
#Salvar
ggsave(cos2, file = paste0(filename, "_cos2.png"), width = 10, height = 5)
#Colorido
fviz_pca_var(data.pca, col.var = "cos2",
gradient.cols = c("black", "orange", "green"),
repel = TRUE)
#Print
print(fviz_pca_var_genes)
print(cos2)
#GSEA
######### INPUT AQUI #########
# all_degs_p_05_vac_infected = all_degs_p_05_vac_infected_27_11_23 %>%
# mutate(condition = case_when(condition == "H-BNT-I (D1)" ~ "BNT-I (D1)",
# condition == "H-BNT-I (D3)" ~ "BNT-I (D3)",
# condition == "H-BNT-I (D4)" ~ "BNT-I (D4)",
# condition == "H-I-I (D2)" ~ "I-I (D2)",
# condition == "I-BNT-I (D2)" ~ "I-BNT-I (D2)",
# condition == "I-BNT-I (D5)" ~ "I-BNT-I (D5)",
# condition == "H-BNT-I (D10, mild)" ~ "BNT-I (D10, mild)",
# condition == "H-BNT-I (D10, severe)" ~ "BNT-I (D10, severe)",
# condition == "H-BNT-I (D26, mild)" ~ "BNT-I (D26, mild)",
# condition == "H-BNT-I (D26, severe)" ~ "BNT-I (D26, severe)",
# condition == "H-I (D10, moderate)" ~ "I (D10, moderate)",
# condition == "H-I (D10, severe)" ~ "I (D10, severe)",
# condition == "H-I (D26, moderate)" ~ "I (D26, moderate)",
# condition == "H-I (D26, severe)" ~ "I (D26, severe)",
# condition == "H-I (D51, moderate)" ~ "I (D51, moderate)",
# condition == "H-I (D51, severe)" ~ "I (D51, severe)",
# TRUE ~ condition))
#
# write.csv(all_degs_p_05_vac_infected_27_11_23, file ="all_degs_p_05_vac_infected_29_11_23.csv")
all_degs_p_05_vac_infected
#GSE199750
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V1, D6)") #Não enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V2, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT (V3, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "BNT-MO (V3, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D6)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D1)") #Enriquecido
# degs <- filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V3, D1)") #Não enriquecido
#GSE201533
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D3)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V1, D7)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D3)") #Não enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd (V2, D7)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V2, D3)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ChAd-BNT (V2, D7)") #Enriquecido
#GSE206023
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D07)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D14)") #Não enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "BBIBP (V3, D28)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D07)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D14)") #Enriquecido
# degs = filter(all_degs_p_05_vac_infected, condition == "ZF2001 (V3, D28)") #Enriquecido
#GSE201530
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D1)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D3)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D4)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D2)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-BNT-I (D2)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-BNT-I (D5)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D1)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D3)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I-I (D5)")
#GSE189039
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I-BNT (D51, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I-BNT (D51, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D10, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D10, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D26, mild)")
# degs = filter(all_degs_p_05_vac_infected, condition == "BNT-I (D26, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D10, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D10, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D26, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D26, severe)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D51, moderate)")
# degs = filter(all_degs_p_05_vac_infected, condition == "I (D51, severe)")
#Definir nome para arquivos gerados
file_name = degs %>%
select(condition) %>%
distinct() %>%
as.character()
######### DEGs para ORA #########
gene_list_ora <- degs$genes
######### DEGs para GSEA #########
degs_gene_list_lf2c <- degs$log2fold_change
names(degs_gene_list_lf2c) <- degs$genes
degs_gene_list_lf2c<-na.omit(original_gene_list)
degs_gene_list_lf2c = sort(degs_gene_list_lf2c, decreasing = TRUE)
############ IMMUNE GO
Immune_GO_T2G = ImmuneGO_Annotated_Genes_Nested_Interesting %>%
select(gene_set_short, genes)
immune_GO_gsea <- GSEA(degs_gene_list_lf2c,
TERM2GENE = Immune_GO_T2G,
minGSSize = 2,
maxGSSize = 1000,
pvalueCutoff = 0.25,
pAdjustMethod = "BH") %>%
as.data.frame() %>%
arrange(qvalue) %>%
mutate(condition = file_name,
gsea_enrichment = "ImmuneGO")
preparing geneSet collections...
GSEA analysis...
leading edge analysis...
done...
#Salvar tabela
write.csv(immune_GO_gsea, file = paste(file_name, "ImmuneGO_GSEA.csv", sep = "_"))
########### CELL MARKER
CellMarker_genes = CellMarker_ImmuneCells %>%
select(cell_name, marker)
cell_types_gsea_immune <- GSEA(degs_gene_list_lf2c,
TERM2GENE = CellMarker_genes,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 0.25,
pAdjustMethod = "BH") %>%
as.data.frame() %>%
arrange(qvalue) %>%
mutate(condition = file_name,
gsea_enrichment = "CellMarker")
preparing geneSet collections...
GSEA analysis...
leading edge analysis...
done...
#Salvar tabela
write.csv(cell_types_gsea_immune, file=paste(file_name, "CellMarker_GSEA.csv", sep = "_"))
########### VAX SIG DB
vax_sig_genes = VAX_Genes_Annotated_RAW %>%
select(STANDARD_NAME, GENE)
ora_degs_vax <- enricher(gene_list_ora,
TERM2GENE = vax_sig_genes,
minGSSize = 1,
maxGSSize = 1000,
pvalueCutoff = 0.25,
pAdjustMethod = "BH")%>%
as.data.frame() %>%
arrange(qvalue) %>%
mutate(condition = file_name,
gsea_enrichment = "VAXSig")
#Salvar
write.csv(ora_degs_vax, file=paste(file_name, "VAXSig_ORA.csv", sep = "_"))